global root_dir = "`1'"

include "$root_dir/code/config/config.do"


cap noi log using ${log_dir}/predicted_vars.log, replace name(dat)

*Handle empty arguments
global arg1 = cond("`2'" == "___EMPTY___", "", "`2'")
global arg2 = cond("`3'" == "___EMPTY___", "", "`3'")
global arg3 = cond("`4'" == "___EMPTY___", "", "`4'")
global arg4 = cond("`5'" == "___EMPTY___", "", "`5'")

if "$arg1" != "" {
    global weight_category "$arg1"
    di "Weight category: ${weight_category}"
}

if "$arg2" != "" {
    global weight_versions "$arg2"
    di "Weight versions: ${weight_versions}"
}

if "$arg3" != "" {
    global weight_window "$arg3"
    di "Weight window: ${weight_window}"
}

if "$arg4" != "" {
	global wtype "$arg4"
}
di "${wtype}"
capture noi {
set more off

*In this file we create the predicted wages, labor productivity and GDP for table A15. 

*1. 		Estimate the rhos for low skill wages

*2. 		Predict the low skill wages 

*3.			Predict high skill wages

*4.			Predict labour productivity

*5.			Predict GDP per capita

*6. 		Put all the predictions together

*7. 		Deflate all variables by ppi and convert into USD using 1995 exchange rates. 

*8. 		Reformatting and adding countrycodes. originally done in another file, moved here so all predicted variables are in one file.		



**********************************************************************
**********1. Estimate the rhos for low skill wages *******************
**********************************************************************

*Import the data for all codes and restrict to the total and manufacturing sectors
use ${final_dir}/wage_combined_final_allcodes.dta, clear
keep if code=="TOT" | code=="D"

*Keep the relevant variables
keep year country code ls_wage hs_wage va_emp cap_GDP_nom_own VA_P_95 value_1995

*Keep years 1995 - 2009 
*Keep manufacturing only
keep if year>1994
keep if year <2017
keep if code=="D"

replace ls_wage=. if year>2009
replace hs_wage=. if year>2009
replace va_emp=. if year>2009
replace cap_GDP_nom_own=. if year>2009
replace VA_P_95=. if year>2009
replace value_1995=. if year>2009

*Generate the log of the variables
gen ln_ls_wage = log(ls_wage)
gen ln_hs_wage = log(hs_wage)
gen ln_va_emp = log(va_emp)
gen ln_cap_GDP_nom_own = log(cap_GDP_nom_own)

egen ctry = group(country)

*Generate the year country variable
tabulate ctry, gen(ctry_year)
foreach x in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 {
    replace ctry_year`x' = (ctry_year`x')*year
}

*Define the panel data
tsset ctry year 

tempfile start_file
save `start_file', replace

************************************************************
************* 1. Diffrent rho for each country *************
************************************************************

*Create the variables to do that (the country low skill wage variable) 
tabulate ctry, gen(ctry_ln_ls_wage)

foreach x in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 {
    replace ctry_ln_ls_wage`x' = (ctry_ln_ls_wage`x')*ln_ls_wage
}

*this regression estimates a rho for every country individually (i.ctry)
regress ln_ls_wage L.ctry_ln_ls_wage* ctry_year* i.ctry if year <2010, robust

*test for significance
testparm L.ctry_ln_ls_wage*

*test if rhos are different
testparm L.ctry_ln_ls_wage*, equal



***********************************************************
**********2. Predict low skill wages **********************
***********************************************************

drop hs_wage va_emp ln_hs_wage ln_va_emp cap_GDP_nom_own ln_cap_GDP_nom_own VA_P_95 value_1995

*Given the estimations of rho above we will predict low sill wages for the 7 years ahead. 
*Then we use the average wage of those years (t+2 till t+7) as predictive wage. 

*Put the original ln_ls_wage into a variable in order to preserve them
gen original_ln_ls_wage = ln_ls_wage

*For each country predict the rho separately (just use one method, since they give the same 
*predictions). And then predict the wages. 
foreach c in aus aut bel bgr bra can che chn cyp cze deu dnk esp est fin fra gbr grc hun idn ind irl ita jpn kor ltu lux lva mex mlt nld pol prt rou rus svk svn swe tur twn usa {
    regress ln_ls_wage L.ln_ls_wage year if country=="`c'" & year <2010, robust
    di "rho in country: `c'"
    di _b[L.ln_ls_wage]

    *assign the rho per country to a local macro in order to save it later
    local rho_`c' = _b[L.ln_ls_wage]

    *predict the values for the next year, shift prediction window and predict again
    *first year separately
    predict ln_ls_wage_t_1 if country=="`c'", xb

    forvalues yy = 1/6 {
        replace ln_ls_wage = ln_ls_wage_t_`yy' if country=="`c'"
        predict ln_ls_wage_t_`=`yy'+1' if country=="`c'", xb
    }

    replace ln_ls_wage = original_ln_ls_wage

    *Shift the predicted wages
    forvalues yy = 1/7 {
        replace ln_ls_wage_t_`yy' = ln_ls_wage_t_`yy'[_n+`yy'] if country=="`c'"
    }

    drop ln_ls_wage_t_1

    *Get rid of the log
    forvalues yy = 2/7 {
        gen ls_wage_t_`yy' = exp(ln_ls_wage_t_`yy')
    }

    *Drop the logs
    drop ln_ls_wage_t_*

    *Generate the average over those predictions
    egen p_ls_wage_s_`c' = rowmean(ls_wage_t_*)
    drop ls_wage_t_2 ls_wage_t_3 ls_wage_t_4 ls_wage_t_5 ls_wage_t_6 ls_wage_t_7

}

*collect and drop individual countries
egen p_ls_wage_s = rowmean(p_ls_wage_s_*)
drop p_ls_wage_s_*

keep year country code ls_wage p_*
drop if year >2009

tempfile predicted_ls_wages
save `predicted_ls_wages', replace


*save the rhos and put them into the final folder
keep country
duplicates drop
gen rho = .
foreach c in aus aut bel bgr bra can che chn cyp cze deu dnk esp est fin fra gbr grc hun idn ind irl ita jpn kor ltu lux lva mex mlt nld pol prt rou rus svk svn swe tur twn usa {
    replace rho = `rho_`c'' if country=="`c'"
}

save ${final_dir}/predicted_lsw_rhos.dta, replace



***********************************************************
**********3. Predict high skill wages *********************
***********************************************************
use `start_file', clear
drop ls_wage va_emp ln_ls_wage ln_va_emp cap_GDP_nom_own ln_cap_GDP_nom_own VA_P_95 value_1995


*Given the estimations of rho  we will predict high wages for the 7 years ahead. 
*Then we use the average wage of those years (t+2 till t+7) as predictive wage.

*Put the original ln_hs_wage into a variable in order to preserve them
gen original_ln_hs_wage = ln_hs_wage

*For each country predict the rho separately (just use one method, since they give the same 
*predictions). And then predict the high skill wages . 
foreach c in aus aut bel bgr bra can che chn cyp cze deu dnk esp est fin fra gbr grc hun idn ind irl ita jpn kor ltu lux lva mex mlt nld pol prt rou rus svk svn swe tur twn usa {

    regress ln_hs_wage L.ln_hs_wage year if country=="`c'" & year <2010, robust
    di "rho in country: `c'"
    di _b[L.ln_hs_wage]

    *assign the rho per country to a local macro in order to save it later
    local rho_`c' = _b[L.ln_hs_wage]

    *predict the values for the next year, shift prediction window and predict again
    *first year separately
    predict ln_hs_wage_t_1 if country=="`c'", xb

    forvalues yy = 1/6 {
        replace ln_hs_wage = ln_hs_wage_t_`yy' if country=="`c'"
        predict ln_hs_wage_t_`=`yy'+1' if country=="`c'", xb
    }

    replace ln_hs_wage = original_ln_hs_wage

    *Shift the predicted wages
    forvalues yy = 1/7 {
        replace ln_hs_wage_t_`yy' = ln_hs_wage_t_`yy'[_n+`yy'] if country=="`c'"
    }

    drop ln_hs_wage_t_1

    *Get rid of the log
    forvalues yy = 2/7 {
        gen hs_wage_t_`yy' = exp(ln_hs_wage_t_`yy')
    }

    *Drop the logs
    drop ln_hs_wage_t_*

    *Generate the average over those predictions
    egen p_hs_wage_s_`c' = rowmean(hs_wage_t_*)
    drop hs_wage_t_2 hs_wage_t_3 hs_wage_t_4 hs_wage_t_5 hs_wage_t_6 hs_wage_t_7

}

*collect and drop individual countries
egen p_hs_wage_s = rowmean(p_hs_wage_s_*)
drop p_hs_wage_s_* 


keep year country code hs_wage p_*
drop if year >2009

tempfile predicted_hs_wages
save `predicted_hs_wages', replace

*save the rhos and put them into the final folder
keep country
duplicates drop
gen rho = .
foreach c in aus aut bel bgr bra can che chn cyp cze deu dnk esp est fin fra gbr grc hun idn ind irl ita jpn kor ltu lux lva mex mlt nld pol prt rou rus svk svn swe tur twn usa {
    replace rho = `rho_`c'' if country=="`c'"
}

save ${final_dir}/predicted_hsw_rhos.dta, replace


***********************************************************
**********4. Predict labor productivity *******************
***********************************************************
use `start_file', clear
drop ls_wage hs_wage ln_hs_wage ln_ls_wage cap_GDP_nom_own ln_cap_GDP_nom_own VA_P_95 value_1995


*Given the estimations of rho  we will predict va_emp for the 7 years ahead. 
*Then we use the average va_emp value of those years (t+2 till t+7) as predictive va_emp.
*lets not save the rhos for this if you want to do that, just copy the code above

*Put the original ln_va_emp into a variable in order to preserve them
gen original_ln_va_emp = ln_va_emp


*For each country predict the rho separately. And then predict the va_emp.

foreach c in aus aut bel bgr bra can che chn cyp cze deu dnk esp est fin fra gbr grc hun idn ind irl ita jpn kor ltu lux lva mex mlt nld pol prt rou rus svk svn swe tur twn usa {

    regress ln_va_emp L.ln_va_emp year if country=="`c'" & year <2010, robust

    *predict the values for the next year, shift prediction window and predict again
    predict ln_va_emp_t_1 if country=="`c'", xb

    forvalues yy = 1/6 {
        replace ln_va_emp = ln_va_emp_t_`yy' if country=="`c'"
        predict ln_va_emp_t_`=`yy'+1' if country=="`c'", xb
    }

    replace ln_va_emp = original_ln_va_emp

    *Shift the predicted va_emp
    forvalues yy = 1/7 {
        replace ln_va_emp_t_`yy' = ln_va_emp_t_`yy'[_n+`yy'] if country=="`c'"
    }

    drop ln_va_emp_t_1

    *Get rid of the log
    forvalues yy = 2/7 {
        gen va_emp_t_`yy' = exp(ln_va_emp_t_`yy')
    }

    *Drop the logs
    drop ln_va_emp_t_*

    *Generate the average over those predictions
    egen p_va_emp_s_`c' = rowmean(va_emp_t_*)
    drop va_emp_t_2 va_emp_t_3 va_emp_t_4 va_emp_t_5 va_emp_t_6 va_emp_t_7

}

*collect and drop individual countries
egen p_va_emp_s = rowmean(p_va_emp_s_*)
drop p_va_emp_s_* 

keep year country code va_emp p_*
drop if year >2009
tempfile predicted_va_emp
save `predicted_va_emp', replace



***********************************************************
**********5. Predict GDP per capita ***********************
***********************************************************
use `start_file', clear
drop ls_wage hs_wage ln_hs_wage ln_ls_wage  va_emp ln_va_emp 

*Rename because otherwise variable name too long. 
rename cap_GDP_nom_own  cap_GDP
rename ln_cap_GDP_nom_own ln_cap_GDP

*Given the estimations of rho  we will predict GDP per capita for the 7 years ahead. 
*Then we use the average GDP per capita value of those years (t+2 till t+7) as predictive GDP per capita. 

*Put the original GDP per capital into a variable in order to preserve them
gen original_ln_cap_GDP = ln_cap_GDP


*For each country predict the rho separately (just use one method, since they give the same 
*predictions). And then predict the GDP per capita. 
foreach c in aus aut bel bgr bra can che chn cyp cze deu dnk esp est fin fra gbr grc hun idn ind irl ita jpn kor ltu lux lva mex mlt nld pol prt rou rus svk svn swe tur twn usa {

    regress ln_cap_GDP L.ln_cap_GDP year if country=="`c'" & year <2010, robust


    predict ln_cap_GDP_t_1 if country=="`c'", xb

    forvalues yy = 1/6 {
        replace ln_cap_GDP = ln_cap_GDP_t_`yy' if country=="`c'"
        predict ln_cap_GDP_t_`=`yy'+1' if country=="`c'", xb
    }


    replace ln_cap_GDP = original_ln_cap_GDP

    *Shift the predicted GDP per capita
    forvalues yy = 1/7 {
        replace ln_cap_GDP_t_`yy' = ln_cap_GDP_t_`yy'[_n+`yy'] if country=="`c'"
    }

    drop ln_cap_GDP_t_1

    *Get rid of the log
    forvalues yy = 2/7 {
        gen cap_GDP_t_`yy' = exp(ln_cap_GDP_t_`yy')
    }

    *Drop the logs
    drop ln_cap_GDP_t_*

    *Generate the average over those predictions
    egen p_cap_GDP_s_`c' = rowmean(cap_GDP_t_*)
    drop cap_GDP_t_2 cap_GDP_t_3 cap_GDP_t_4 cap_GDP_t_5 cap_GDP_t_6 cap_GDP_t_7


}

*collect and drop individual countries
egen p_cap_GDP_s = rowmean(p_cap_GDP_s_*)
drop p_cap_GDP_s_* 

keep year country code cap_GDP p_* VA_P_95 value_1995

drop if year >2009

*******************************************************************
*********** 6. Put all the predictions together *******************
*******************************************************************

*Put all the predictions together again
merge 1:1 year country code using `predicted_ls_wages'
drop _merge
merge 1:1 year country code using `predicted_hs_wages'
drop _merge
merge 1:1 year country code using `predicted_va_emp'
drop _merge

**********************************************************************************************
****** 7. Deflate all variables by ppi and convert into USD using 1995 exchange rates ********
**********************************************************************************************

*Note j stands for jointly (so for one rho for all countries) and s stands for
*separate (so one rho for each country) in the variable name. If there is a t_4 in 
*the variable name it is the predicted value in t+4. If not it is the predicted
*value using the average over t+2 till t+7 predicted values. 

*Deflate by PPI 
foreach var in ls_wage hs_wage va_emp cap_GDP {
    gen p_`var'_s_pi_95=p_`var'_s/VA_P_95*100
}


*Divide each deflated variable by the exchange rate in 1995.
*(Note: Exchange rate is already construced such that it is the conversion of the local 
*currancy to USD till the Euro is introduced and from then one the exchange rate is the conversion of 
*Euro to USD.) 
foreach var in ls_wage_s hs_wage_s va_emp_s cap_GDP_s {

    gen p_`var'_pi_USD_95=p_`var'_pi_95/value_1995

    *And then we have to fix the conversion for the countries that joined the Euro later 
    *than 1995 (which is all EU countries). 
    replace p_`var'_pi_USD_95=p_`var'_pi_USD_95*239.64 if country=="svn" 
    replace p_`var'_pi_USD_95=p_`var'_pi_USD_95*30.1260 if country=="svk" 
    replace p_`var'_pi_USD_95=p_`var'_pi_USD_95*0.429300 if country=="mlt" 
    replace p_`var'_pi_USD_95=p_`var'_pi_USD_95*0.585274 if country=="cyp" 
    replace p_`var'_pi_USD_95=p_`var'_pi_USD_95*15.6466 if country=="est" 

    replace p_`var'_pi_USD_95=p_`var'_pi_USD_95*166.386 if country=="esp" 
    replace p_`var'_pi_USD_95=p_`var'_pi_USD_95*13.7603 if country=="aut" 
    replace p_`var'_pi_USD_95=p_`var'_pi_USD_95*40.3399 if country=="bel" 
    replace p_`var'_pi_USD_95=p_`var'_pi_USD_95*5.94573 if country=="fin" 
    replace p_`var'_pi_USD_95=p_`var'_pi_USD_95*6.55957 if country=="fra"

    replace p_`var'_pi_USD_95=p_`var'_pi_USD_95*1.95583 if country=="deu" 
    replace p_`var'_pi_USD_95=p_`var'_pi_USD_95*340.750 if country=="grc" 
    replace p_`var'_pi_USD_95=p_`var'_pi_USD_95*0.787564 if country=="irl" 
    replace p_`var'_pi_USD_95=p_`var'_pi_USD_95*1936.27 if country=="ita" 
    replace p_`var'_pi_USD_95=p_`var'_pi_USD_95*2.20371 if country=="nld" 
    replace p_`var'_pi_USD_95=p_`var'_pi_USD_95*200.482 if country=="prt" 
    replace p_`var'_pi_USD_95=p_`var'_pi_USD_95*40.3399 if country=="lux"

}

*Lithuania is a special case because GDP data is always in euro and wage data always
*in local currency. Thus, only have to multiply by last exchange rate of Euro /
*local currecy for the GDP data. 
replace p_cap_GDP_s_pi_USD_95=p_cap_GDP_s_pi_USD_95 *3.4528 if country=="ltu"

drop VA_P_95 value_1995
drop p_va_emp_s p_ls_wage_s_pi_95 p_hs_wage_s_pi_95 p_va_emp_s_pi_95 p_cap_GDP_s_pi_95 
drop p_hs_wage_s p_ls_wage_s p_cap_GDP_s 

label var p_ls_wage_s_pi_USD_95 "predicted low skill wage in USD 95"
label var p_hs_wage_s_pi_USD_95 "predicted high skill wage in USD 95"
label var p_va_emp_s_pi_USD_95 "predicted labor productivity in USD 95"
label var p_cap_GDP_s_pi_USD_95 "predicted GDP per capita in USD 95"

*tempfile name here is based on a
tempfile predicted
save `predicted', replace

*******************************************
**8. reformatting and adding countrycodes**
*******************************************

local importName_lsw ls_wage
local importName_hsw hs_wage
local importName_vaemp va_emp
local importName_gdppc cap_GDP

foreach www in lsw hsw vaemp gdppc {
	use `predicted', clear
	keep year country p_`importName_`www''*
	gen iso_code = upper(country)
	mmerge iso_code using ${commondata_dir}/patstat_2018b/country_codes.dta, unmatched(master) umatch(iso) ukeep(ctry)
	drop country _m iso_code
	ren p_`importName_`www''_s_pi_USD_95 `www'Ps_
	reshape wide `www'Ps_ , i( year) j(ctry) string
	compress
	save ${dataset_dir}/indep_vars/`www'_predicted_wide_MANUF.dta, replace
}


}
if _rc == 0 {
    display "Execution finished successfully."
}
else {
    display "Execution finished with errors."
}

cap log close dat